1 module d_tree_sitter.parser; 2 3 extern (C): 4 5 import d_tree_sitter.language; 6 import d_tree_sitter.tree; 7 import d_tree_sitter.tree_visitor; 8 import d_tree_sitter.tree_printer; 9 import d_tree_sitter.libc : TSTree; 10 11 import std.typecons : Nullable; 12 import std.format : format; 13 import std..string : fromStringz, toStringz; 14 15 /** A stateful object that this is used to produce a `Tree` based on some source code */ 16 struct Parser 17 { 18 import d_tree_sitter.libc : TSParser, ts_parser_new, ts_parser_delete, 19 ts_parser_language, ts_parser_set_language, ts_parser_logger, TSLogger, 20 ts_parser_print_dot_graphs, ts_parser_parse, ts_parser_parse_string, 21 ts_parser_parse_string_encoding, TSInput, TSInputEncoding; 22 import std.stdio : File; 23 24 /** internal TSParser */ 25 TSParser* tsparser; 26 27 /** Create a new Parser for the given language. 28 NOTE: It assumes that the language is compatible (uses `set_language_nothrow`). 29 Params: 30 language = the language you want to create a parser for 31 */ 32 this(in Language language) nothrow @nogc 33 { 34 // Create a parser. 35 this.tsparser = ts_parser_new(); 36 37 // Set the parser's language. 38 const success = this.set_language_nothrow(language); 39 assert(success); 40 } 41 42 @disable this(); 43 @disable this(this); 44 45 ~this() @nogc nothrow 46 { 47 stop_printing_dot_graphs(); 48 ts_parser_delete(this.tsparser); 49 } 50 51 /** 52 * Set the language that the parser should use for parsing. 53 * 54 * NOTE it assumes that the language is compatible. Returns a boolean indicating whether or not the language was successfully 55 * assigned. 56 */ 57 auto set_language_nothrow(in Language language) nothrow 58 { 59 return ts_parser_set_language(tsparser, language.tslanguage); 60 } 61 62 /** 63 * Set the language that the parser should use for parsing. 64 * 65 * Returns a boolean indicating whether or not the language was successfully 66 * assigned. True means assignment succeeded. False means there was a version 67 * mismatch, the language was gen with an incompatible version of the 68 * Tree-sitter CLI. Check the language's version using `ts_language_version` 69 * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and 70 * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants. 71 */ 72 auto set_language(in Language language) 73 { 74 // TODO make set_language private? 75 enforce_compatible_language(language); 76 return ts_parser_set_language(tsparser, language.tslanguage); 77 } 78 79 /** Throws an error if the version of the given language is not compatible */ 80 void enforce_compatible_language(Language language) const 81 { 82 import d_tree_sitter.libc : TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, 83 TREE_SITTER_LANGUAGE_VERSION; 84 85 auto language_version = language.get_version(); 86 if (language_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 87 || language_version > TREE_SITTER_LANGUAGE_VERSION) 88 { 89 throw new Exception( 90 format!"Incompatible language version %d. Expected minimum %d, maximum %d"(language_version, 91 TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, TREE_SITTER_LANGUAGE_VERSION)); 92 } 93 } 94 95 /** Get the parser's current language. */ 96 auto language() const @nogc nothrow 97 { 98 auto ptr = ts_parser_language(tsparser); 99 if (!ptr) 100 { 101 return Nullable!Language.init; 102 } 103 return Nullable!Language(Language(ptr)); 104 } 105 106 /** Get the parser's current logger. */ 107 TSLogger* logger() const @nogc nothrow 108 { 109 auto logger = ts_parser_logger(tsparser); 110 return cast(TSLogger*) logger.payload; 111 } 112 113 // TODO 114 // set_logger 115 116 /** 117 Set the destination to which the parser should write debugging graphs 118 during parsing. The graphs are formatted in the DOT language. You may want 119 to pipe these graphs directly to a `dot(1)` process in order to generate 120 SVG output. 121 */ 122 auto print_dot_graphs(File file) 123 { 124 // TODO is file.fileno a raw fd?! 125 return ts_parser_print_dot_graphs(tsparser, file.fileno()); 126 } 127 128 /** Stop the parser from printing debugging graphs while parsing. */ 129 auto stop_printing_dot_graphs() @nogc nothrow 130 { 131 return ts_parser_print_dot_graphs(tsparser, -1); 132 } 133 134 /** 135 Use the parser to parse some source code and create a syntax tree. 136 137 If you are parsing this document for the first time, pass `NULL` for the 138 `old_tree` parameter. Otherwise, if you have already parsed an earlier 139 version of this document and the document has since been edited, pass the 140 previous syntax tree so that the unchanged parts of it can be reused. 141 This will save time and memory. For this to work correctly, you must have 142 already edited the old syntax tree using the `ts_tree_edit` function in a 143 way that exactly matches the source code changes. 144 145 The `TSInput` parameter lets you specify how to read the text. It has the 146 following three fields: 147 1. `read`: A function to retrieve a chunk of text at a given byte offset 148 and (row, column) position. The function should return a pointer to the 149 text and write its length to the `bytes_read` pointer. The parser does 150 not take ownership of this buffer; it just borrows it until it has 151 finished reading it. The function should write a zero value to the 152 `bytes_read` pointer to indicate the end of the document. 153 2. `payload`: An arbitrary pointer that will be passed to each invocation 154 of the `read` function. 155 3. `encoding`: An indication of how the text is encoded. Either 156 `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. 157 158 This function returns a syntax tree on success, and `NULL` on failure. There 159 are three possible reasons for failure: 160 1. The parser does not have a language assigned. Check for this using the 161 `ts_parser_language` function. 162 2. Parsing was cancelled due to a timeout that was set by an earlier call to 163 the `ts_parser_set_timeout_micros` function. You can resume parsing from 164 where the parser left out by calling `ts_parser_parse` again with the 165 same arguments. Or you can start parsing from scratch by first calling 166 `ts_parser_reset`. 167 3. Parsing was cancelled using a cancellation flag that was set by an 168 earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing 169 from where the parser left out by calling `ts_parser_parse` again with 170 the same arguments. 171 */ 172 auto parse(TSInput input, const TSTree* old_tree = Tree.create_empty()) @nogc nothrow 173 { 174 return ts_parser_parse(tsparser, old_tree, input); 175 } 176 177 /** 178 Use the parser to parse some source code stored in one contiguous buffer. 179 The first two parameters are the same as in the `ts_parser_parse` function 180 above. The second two parameters indicate the location of the buffer and its 181 length in bytes. 182 */ 183 auto parse(const string source_code, const TSTree* old_tree = Tree.create_empty()) nothrow 184 { 185 // convert to c string 186 const source_code_c = toStringz(source_code); 187 const source_code_length = cast(uint)(source_code.length); 188 return ts_parser_parse_string(tsparser, old_tree, source_code_c, source_code_length); 189 } 190 191 /** 192 Use the parser to parse some source code stored in one contiguous buffer with 193 a given encoding. The first four parameters work the same as in the 194 `ts_parser_parse_string` method above. The final parameter indicates whether 195 the text is encoded as UTF8 or UTF16. 196 */ 197 auto parse(const string source_code, const TSInputEncoding encoding, 198 const TSTree* old_tree = Tree.create_empty()) nothrow 199 { 200 // convert to c string 201 const source_code_c = toStringz(source_code); 202 const source_code_length = cast(uint)(source_code.length); 203 return ts_parser_parse_string_encoding(tsparser, old_tree, source_code_c, 204 source_code_length, encoding); 205 } 206 207 /** 208 Parse the given source_code that is in utf8 encoding 209 */ 210 auto parse_utf8(const string source_code, const TSTree* old_tree = Tree.create_empty()) nothrow 211 { 212 return parse(source_code, TSInputEncoding.TSInputEncodingUTF8, old_tree); 213 } 214 215 /** 216 Parse the given source_code that is in utf16 encoding 217 */ 218 auto parse_utf16(const wstring source_code, const TSTree* old_tree = Tree.create_empty()) nothrow @nogc 219 { 220 // TODO is this correct? 221 // convert to c string 222 const source_code_c = cast(const char*)(source_code); 223 const source_code_length = cast(uint)(source_code.length); 224 return ts_parser_parse_string_encoding(tsparser, old_tree, source_code_c, 225 source_code_length, TSInputEncoding.TSInputEncodingUTF16); 226 } 227 228 /** 229 Get the S-expression of the given source code 230 Params: 231 source_code = the given source code as a string 232 Returns: the parsed S-expression 233 */ 234 auto s_expression(const string source_code) nothrow 235 { 236 auto tree = Tree(parse(source_code)); 237 238 // Get the root node of the syntax tree. 239 auto root_node = tree.root_node(); 240 241 // Print the syntax tree as an S-expression. 242 return root_node.to_string(); 243 } 244 245 /** 246 Traverse the [Tree] starting from its root [Node] applying a visitor at all nodes. 247 */ 248 void traverse(const string source_code, TreeVisitor visitor) 249 { 250 auto tree = Tree(parse(source_code)); 251 252 // Get the root node of the syntax tree. 253 auto root_node = tree.root_node(); 254 255 root_node.traverse(visitor); 256 } 257 258 /** 259 Traverse the `Tree` starting from its root `Node` and print information about each 260 */ 261 string traverse_print(const string source_code) @trusted 262 { 263 auto tree = Tree(parse(source_code)); 264 265 // Get the root node of the syntax tree. 266 auto root_node = tree.root_node(); 267 268 // a visitor to print information 269 auto visitor = new TreePrinter(source_code); 270 271 root_node.traverse(visitor); 272 273 return cast(string) visitor.tree_string; // convert bc.string.String to string 274 } 275 }